/*NOTE: This is model code to illustrate the logic used to generate the datasets for analysis in the SCARP pilot.
This code is not intended to run as is - due to Epic restrictions on the sharing of intellectual property all 
of the table and attribute names have been "masked" with names enclosed in square brackets if they were pulled 
from Clarity (EPIC).*/

/*****************************************************
PROJECT:  BRIDGE C2 
PILOT: SCRAP
Date: 

DESCRIPTION: 
Use ASCEND approach to compare CVC, CVC and 
BC cancer prevention services in patients with
	No SDH Screening
	Documented Need
	Documented No Need
Stratified by SDH need: Food, Housing, Transportation
STUDY PERIOD: 7/1/2016-2/29/2020		


DETAILS (This Syntax): 
Pull independent variables for CVC_cohort

INPUT DATA: 
SCRAP.CVC_patients

OUTPUT DATA:
SCRAP.CVC_exclusions_LU
SCRAP.CVC_exclusions_detail
SCRAP.CVC_first_exclusion
scrap.CVC_patient_covariates


SECTIONS:
A. Pull exclusion dates and apply, update observation length;
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
C. Pull demographics
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	Age, primary payor at index
	FPL on or after Index
	Visits per year


*****************************************************/
options compress=yes reuse=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;

/***************************************************************/

/***************************************************************/
/*A. Pull exclusion dates and apply, update observation length;*/
/***************************************************************/

options mprint symbolgen mlogic;
%let patients=scrap.SCRAP_CVC_PATIENTS;/*location of your denominator file*/
%let idname=raw_patid;/*name of the id variable that links to Clarity [patient identifier]*/
%let LU_output=scrap.CVC_exclusion_LU;/*output file of exclusion codes*/
%let end_date='29FEB2020'd;
%let CVC_exclusion_detail=SCRAP.CVC_exclusions_detail;
%let first_CVC_exclude=SCRAP.CVC_First_exclusion;

%include 'e:\sasroot\BRIDGE C2\SAS Macros\CVC_exclusion_dates_macro_revised.sas';
%CVC_ex;

%macro CvC_ex;

proc sql;
create table RD.jpo_cvc_macro_patients as 
select * from &patients;

/*lu_output file created from internal code groupers*/

proc sql;
create table RD.jpo_CVC_macro_exclusion_codes as select * from &LU_output;
quit;

/*exclusion dates*/
/*from surgical history*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_shx as 
select * from connection to mycon (
     SELECT den.*
	,eap.[procedure name]	
  	,shx.[contact date] 
	,shx.[SURGICAL HISTORY START DATE]
     ,shx.[SURGICAL HISTORY DATE] 
     ,'Surgical Hx' AS Exclusion_Source
    FROM research_dev.dbo.jpo_cVc_macro_patients  AS den
     INNER JOIN clarity.dbo.[SURGICAL HISTORY]        AS shx
      ON den.&idname = shx.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CVC_macro_exclusion_codes AS cpc
      ON shx.[PROCEDURE IDENTIFIER] = cpc.[PROCEDURE IDENTIFIER]
	  left join clarity.dbo.[procedures records EAP] eap on shx.[PROCEDURE IDENTIFIER]=eap.[PROCEDURE IDENTIFIER]
	where cpc.compiled_context='EAP')
	;      
quit;


data exclude_shx (keep=raw_patid ADVANCE_patid [procedure name] exclude_date exclusion_source);
set exclude_shx;
format exclude_date date9.;
exclude_date=datepart([SURGICAL HISTORY START DATE]);
if exclude_date=. then exclude_date=datepart([contact date]);
if exclude_date>&end_date then delete;
run;

/*from medical_hx*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_mhx as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,mhx.[contact date] 
	,mhx.[MEDICAL HISTORY START DATE]
     ,mhx.[MEDICAL HISTORY DATE] 
     ,'Medical Hx' AS Exclusion_source
    FROM research_dev.dbo.jpo_CVC_macro_patients  AS den
     INNER JOIN clarity.dbo.[MEDICAL HISTORY]        AS mhx
      ON den.&idname = mhx.[patient identifier]
     INNER JOIN jpo_CVC_macro_exclusion_codes AS cpc
      ON mhx.[diagnosis identifier] = cpc.[diagnosis identifier]
	  LEFT JOIN CLARITY.dbo.[DIAGNOSIS EDG] edg
	  	on mhx.[diagnosis identifier]=edg.[diagnosis identifier]);      
quit;


data exclude_mhx (keep=raw_patid ADVANCE_patid [diagnosis name] exclude_date exclusion_source);
set exclude_mhx;
format exclude_date date9.;
exclude_date=datepart([MEDICAL HISTORY START DATE]);
if exclude_date=. then exclude_date=datepart([contact date]);
if exclude_date>&end_date then delete;
run;

/*from encounter dx */
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_patenc_dx as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,pe.[contact date] 
     ,'Pat Enc Dx' AS Exclusion_Source
    FROM research_dev.dbo.jpo_CVC_macro_patients  AS den
     INNER JOIN clarity.dbo.[PATIENT ENCOUNTER]       AS pe
	 on den.&idname=pe.[patient identifier]
     INNER JOIN clarity.dbo.[PATIENT ENCOUNTER DIAGNOSIS]        AS pedx
      ON pe.[patient encounter contact serial number identifier] = pedx.[patient encounter contact serial number identifier]
     INNER JOIN research_dev.dbo.jpo_CVC_macro_exclusion_codes AS cpc
      ON pedx.[diagnosis identifier] = cpc.[diagnosis identifier]
	  LEFT JOIN clarity.dbo.[DIAGNOSIS EDG] edg
	  	on pedx.[diagnosis identifier]=edg.[diagnosis identifier]);      
quit;


data exclude_patenc_dx (keep=raw_patid ADVANCE_patid [diagnosis name] exclusion_source exclude_date);
set exclude_patenc_dx;
format exclude_date date9.;
exclude_date=datepart([contact date]);
if exclude_date>&end_date then delete;
run;

/*from problem list*/
 proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_Plist as 
select * from connection to mycon (
     SELECT den.*
	,edg.[diagnosis name]	
  	,pl.[noted date]
	,pl.[date of entry]
     ,'Problem list dx' AS Exclusion_source
    FROM research_dev.dbo.jpo_CVC_macro_patients  AS den
     INNER JOIN clarity.dbo.[problem list]        AS pl
      ON den.&idname = pl.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CVC_macro_exclusion_codes AS cpc
      ON pl.[diagnosis identifier] = cpc.[diagnosis identifier]
	 INNER JOIN clarity.dbo.[DIAGNOSIS EDG] edg  
		on pl.[diagnosis identifier]=edg.[diagnosis identifier] );      
quit; 


data exclude_plist (keep=raw_patid ADVANCE_patid [diagnosis name] exclude_date exclusion_source);
set exclude_plist;
format exclude_date date9.;
exclude_date=datepart([noted date]);
if exclude_date=. then exclude_date=datepart([date of entry]);
if exclude_date>&end_date then delete;
run;

 proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table exclude_smartdata as 
select * from connection to mycon (
SELECT den.&idname, 
		sedv.[patient link identifier],
           'SmartData Documentation',
           CAST(pev.[contact date] AS VARCHAR (255))
    FROM Clarity.dbo.[SmartData Elements Metadata] sedv
        INNER JOIN Clarity.dbo.[SmartData Elements Values] sevv
            ON sedv.[SmartData Element Identifier] = sevv.[SmartData Element Identifier]
        INNER JOIN research_dev.dbo.jpo_CVC_macro_exclusion_codes hc
            ON sedv.[ELEMENT IDENTIFIER] = hc.ELEMENT_ID
        LEFT JOIN Clarity.dbo.[General Use Notes Information] hno
            ON (
                   hno.[NOTE IDENTIFIER] = sedv.[RECORD IDENTIFIER]
                   AND sedv.[CONTEXT NAME] = 'NOTE'
               )
               OR
               (
                   hno.[NOTE IDENTIFIER] = sedv.SRC_[NOTE IDENTIFIER]
                   AND sedv.[CONTEXT NAME] = 'ENCOUNTER'
               )
        LEFT JOIN clarity.dbo.[PATIENT ENCOUNTER] pev
            ON pev.[patient encounter contact serial number identifier] = hno.[patient encounter contact serial number identifier]
		inner join research_dev.dbo.jpo_CVC_macro_patients as den
			on pev.[patient identifier]=den.&idname
    WHERE sevv.[SmartData Elements Value] IN ( '1', 'Y' ))
;
quit;

data &CVC_exclusion_detail;
set exclude_mhx exclude_patenc_dx exclude_plist exclude_shx;
run;
proc sql;
create table &first_CVC_exclude as
select den.&idname, 
min(exclude_date) as exclude_date format date9.
from &CVC_exclusion_detail den group by den.&idname;
quit;

proc datasets library=reschdev;
delete jpo_CVC_macro_patients
jpo_cvc_macro_exclusion_codes;
quit;
%mend;




options nomprint nosymbolgen nomlogic;

/*get last_encounter before exclude_date*/
/*(observation period runs from index visit to last encounter before exclusion)*/
proc sql;
create table last_enc as
select x.*,
max(e.[contact date]) as last_encounter format date9.
from scrap.CVC_first_exclusion x
left join scrap.scrap_potential_cohort_pc_enc e
on x.raw_patid=e.raw_patid
where e.[contact date]<x.exclude_date
group by x.raw_patid,x.exclude_date ;

proc sql;
create table dropped_pat as
select distinct raw_patid from scrap.CVC_first_exclusion
where raw_patid ^in (select distinct raw_patid from last_enc);


*merge exclusions to patients;
proc sql;
create table CVC_Pat as
select p.*,
x.exclude_date,
x.last_encounter as last_enc2,
d.sas_death_date format date9.
from SCRAP.SCRAP_CVC_patients p
left join last_enc x
on p.raw_patid=x.raw_patid
left join ochin.death d on p.raw_patid=d.raw_patid
where p.raw_patid^in(select distinct raw_patid from dropped_pat);


*truncate observation period at last_enc2;
data CVC_pat;
set CVC_pat;
format observation_end date9.;
where sas_death_date=. or sas_death_date>last_enc;
if last_enc2=. then observation_end=last_enc;
if exclude_date^=. or death_date^=. then do;
	if last_enc2<last_enc then
		observation_end=last_enc2;
	if last_enc2>=last_enc then
		observation_end=last_enc;
end;
observation_length=yrdif(index_visit, observation_end,'age');
run;

data CVC_pat2;
set CVC_pat;
if observation_length<1 and exclude_date^=1 then delete;
run;

/*****************************************************************************
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
*******************************************************************************/
/*link patients to screens at their primary_clinic*/
proc sql;
create table pat_screens as
select 
p.raw_patid as raw_patid2,
p.delivery_site_id,
p.index_visit,
p.observation_end,
sdh.*,
domain='Housing Instability' as housing_screen,
domain='Food Insecurity' as food_screen,
domain='Transportation' as Transportation_screen,
sdh_positive_screen='1' as positive_screen
from CVC_pat2 p
left join scrap.scrap_sdh_screening_3_domains sdh
on p.raw_patid=sdh.raw_patid and p.delivery_site_id=sdh.delivery_site_id
and sdh.screen_date between p.index_visit and p.observation_end
order by p.raw_patid, domain, screen_date;
quit;

*summarize observation period screens;
proc sql;
create table screen_summary as
select raw_patid2 as raw_patid,
delivery_site_id,
max(Food_screen) as screened_food,
max(food_screen*positive_screen) as pos_food,
max(Housing_screen) as screened_housing,
max(Housing_screen*positive_screen) as pos_housing,
max(Transportation_screen) as screened_transportation,
max(Transportation_screen*positive_screen) as pos_transportation
from pat_screens group by raw_patid2, delivery_site_id;

/*time to first screen*/
data first_screen (keep=raw_patid2 first_food first_housing first_transportation);
set pat_screens;
by raw_patid2 domain;
retain first_food first_housing first_transportation;
format first_food first_housing first_transportation date9.;
if first.raw_patid2 then do;
	first_food=.;
	first_housing=.;
	first_transportation=.;
end;
if first.raw_patid2 or first.domain then do;
	if domain='Food Insecurity' then first_food=screen_date;
	if domain='Housing Instability' then first_housing=screen_date;
	if domain='Transportation' then first_transportation=screen_date;
end;
if last.raw_patid2 then output;
run;

proc sql;
create table CVC_pat3 as
select p.*,
s.screened_food,
f.first_food,
s.pos_food,
s.screened_housing,
f.first_housing,
s.pos_housing,
s.screened_transportation,
f.first_transportation,
s.pos_transportation
from CVC_pat2 p left join screen_summary s
on p.raw_patid=s.raw_patid
left join first_screen f
on p.raw_patid=f.raw_patid2;
quit;

data CVC_pat3;
set CVC_pat3;
length SDH_food SDH_Housing SDH_Transportation $20;
time_to_food=first_food-index_visit;
time_to_housing=first_housing-index_visit;
time_to_transportation=first_transportation-index_visit;
if screened_food=0 then SDH_food='3_Never Screened';
else if screened_Food=1 then do;
	if pos_Food=0 then SDH_Food='2_Negative Screen';
	if pos_Food=1 then SDH_Food='1_Positive Screen';
end;
if screened_housing=0 then SDH_housing='3_Never Screened';
else if screened_housing=1 then do;
	if pos_housing=0 then SDH_housing='2_Negative Screen';
	if pos_housing=1 then SDH_housing='1_Positive Screen';
end;
if screened_transportation=0 then SDH_transportation='3_Never Screened';
else if screened_transportation=1 then do;
	if pos_transportation=0 then SDH_transportation='2_Negative Screen';
	if pos_transportation=1 then SDH_transportation='1_Positive Screen';
end;
run;

data scrap.temp_CVC_pat3;
set CVC_pat3;
run;

/*****************************************************************************
C. Pull demographics and encounter variables
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	age at index
	primary payor at index
	FPL on or after Index
	PC visits per year in observation period

*******************************************************************************/
/*demographics*/
proc sql;
create table CVC_pat4 as
select p.*,
d.hispanic_description,
d.race_description,
d.pat_pref_language_spoken_desc
from CVC_pat3 p left join ochin.demographic d
on p.raw_patid=d.raw_patid;

data CVC_pat4;
set CVC_pat4;
/*Note that ASCEND manuscript groups missing language with other*/
if pat_pref_language_spoken_desc='English'
	then language_c='English';
else if pat_pref_language_spoken_desc='Spanish, Castilian'
	then language_c='Spanish';
else if pat_pref_language_spoken_desc^ in('English', 'Spanish, Castilian') 
	then language_c='Other';
if Hispanic_description='No' then do;
	if race_description='White' then race_ethnic='Nonhispanic White';
	if race_description='Black or African American' then race_ethnic='Nonhispanic Black';
	if race_description ^in('White','No information','Black or African American', 'Unknown','Refuse to answer')
		then race_ethnic='Nonhispanic Other';
end;
if Hispanic_description='Yes' then race_ethnic='Hispanic';
if race_ethnic=' ' then race_ethnic='Unknown';
run;


/*PC encounters*/
proc sql;
create table pc_enc as
select c.raw_patid,
count(distinct e.encounterid) as PC_encounters
from CVC_pat4 c left join 
scrap.scrap_potential_cohort_pc_enc e
on c.raw_patid=e.raw_patid and
	e.[contact date] between c.index_visit and c.observation_end
group by c.raw_patid;
create table CVC_pat5 as
select c.*,
e.PC_encounters,
round(e.PC_encounters/observation_length,1) as yearly_visits
from CVC_pat4 c left join pc_enc e
on c.raw_patid=e.raw_patid;
 
data CVC_pat5;
set CVC_pat5;
if yearly_visits<3 then visit_cat='1-2';
if yearly_visits>=3 and yearly_visits<5 then visit_cat='3-4';
if yearly_visits>=5 and yearly_visits<7 then visit_cat='5-6';
if yearly_visits>=7 then visit_cat='7+';
run;

/*index visit variables*/
proc sql;
create table encounters as
select p.raw_patid,
p.index_visit,
e.[contact date],
e.age_at_encounter_date,
e.FPL_percentage,
e.payor_type_research
from CVC_pat5 p left join scrap.scrap_potential_cohort_pc_enc e
on p.raw_patid=e.raw_patid and
e.[contact date]>=p.index_visit
order by p.raw_patid, e.[contact date];

data index;
set encounters;
by raw_patid;
if first.raw_patid then output;
run;

data first_fpl;
set encounters;
where fpl_percentage^=.;
by raw_patid;
if first.raw_patid then output;
run;

proc sql;
create table CVC_pat6 as
select p.*,
i.age_at_encounter_date,
i.payor_type_research,
f.fpl_percentage
from CVC_pat5 p left join index i
on p.raw_patid=i.raw_patid
left join first_fpl f on 
p.raw_patid=f.raw_patid;


data scrap.CVC_patient_covariates;
set CVC_pat6;
if fpl_percentage=. then FPL_cat='No data';
if fpl_percentage^=. and FPL_percentage<=100 then FPL_cat='<=100';
if fpl_percentage>100 and FPL_percentage<=200 then FPL_cat='<=200';
if fpl_percentage>200 then FPL_cat='>200';
if payor_type_research=' ' then payor_type_research='Uninsured';
if age_at_encounter_date<40 then age_cat='23-39';
if age_at_encounter_date>=40 and age_at_encounter_date<50 then age_cat='40-49';
if age_at_encounter_date>=50 and age_at_encounter_date<65 then age_cat='50-64';
if age_at_encounter_date>=65 then age_cat='65-70';
run;

data SCRAP.CVC_patient_covariates;
set SCRAP.CVC_patient_covariates;
english=pat_pref_language_spoken_desc='English';
rename age_at_encounter_date=age_at_index_visit;
run; 

data SCRAP.CVC_patient_covariates;
set SCRAP.CVC_patient_covariates;
length obs_cat $10;
if years_observation<2 then obs_cat='<2';
if years_observation>=2 then obs_cat='2-3.6';
if age_cat='50-64' then age_cat='50-63';
run; 

